#!/usr/bin/env bash
# This file contains the script to generate the ppo self-play agents for the 5 classic layouts

python ppo_rllib_client.py with  seeds=[0]  layout_name="coordination_ring" clip_param=0.069 gamma=0.975 grad_clip=0.359 kl_coeff=0.156 lmbda=0.5 lr=1.6e-4 num_training_iters=650 old_dynamics=True reward_shaping_horizon=5000000 use_phi=False vf_loss_coeff=9.33e-3 results_dir=reproduced_results/ppo_sp_coordination_ring
python ppo_rllib_client.py with  seeds=[10] layout_name="coordination_ring" clip_param=0.069 gamma=0.975 grad_clip=0.359 kl_coeff=0.156 lmbda=0.5 lr=1.6e-4 num_training_iters=650 old_dynamics=True reward_shaping_horizon=5000000 use_phi=False vf_loss_coeff=9.33e-3 results_dir=reproduced_results/ppo_sp_coordination_ring
python ppo_rllib_client.py with  seeds=[20] layout_name="coordination_ring" clip_param=0.069 gamma=0.975 grad_clip=0.359 kl_coeff=0.156 lmbda=0.5 lr=1.6e-4 num_training_iters=650 old_dynamics=True reward_shaping_horizon=5000000 use_phi=False vf_loss_coeff=9.33e-3 results_dir=reproduced_results/ppo_sp_coordination_ring
python ppo_rllib_client.py with  seeds=[30] layout_name="coordination_ring" clip_param=0.069 gamma=0.975 grad_clip=0.359 kl_coeff=0.156 lmbda=0.5 lr=1.6e-4 num_training_iters=650 old_dynamics=True reward_shaping_horizon=5000000 use_phi=False vf_loss_coeff=9.33e-3 results_dir=reproduced_results/ppo_sp_coordination_ring
python ppo_rllib_client.py with  seeds=[40] layout_name="coordination_ring" clip_param=0.069 gamma=0.975 grad_clip=0.359 kl_coeff=0.156 lmbda=0.5 lr=1.6e-4 num_training_iters=650 old_dynamics=True reward_shaping_horizon=5000000 use_phi=False vf_loss_coeff=9.33e-3 results_dir=reproduced_results/ppo_sp_coordination_ring

python ppo_rllib_client.py with  seeds=[0]  layout_name="asymmetric_advantages" clip_param=0.229 gamma=0.964 grad_clip=0.256 kl_coeff=0.185 lmbda=0.5 lr=2.1e-4 num_training_iters=650 old_dynamics=True reward_shaping_horizon=5000000 use_phi=False vf_loss_coeff=0.022 results_dir=reproduced_results/ppo_sp_asymmetric_advantages
python ppo_rllib_client.py with  seeds=[10]  layout_name="asymmetric_advantages" clip_param=0.229 gamma=0.964 grad_clip=0.256 kl_coeff=0.185 lmbda=0.5 lr=2.1e-4 num_training_iters=650 old_dynamics=True reward_shaping_horizon=5000000 use_phi=False vf_loss_coeff=0.022 results_dir=reproduced_results/ppo_sp_asymmetric_advantages
python ppo_rllib_client.py with  seeds=[20]  layout_name="asymmetric_advantages" clip_param=0.229 gamma=0.964 grad_clip=0.256 kl_coeff=0.185 lmbda=0.5 lr=2.1e-4 num_training_iters=650 old_dynamics=True reward_shaping_horizon=5000000 use_phi=False vf_loss_coeff=0.022 results_dir=reproduced_results/ppo_sp_asymmetric_advantages
python ppo_rllib_client.py with  seeds=[30]  layout_name="asymmetric_advantages" clip_param=0.229 gamma=0.964 grad_clip=0.256 kl_coeff=0.185 lmbda=0.5 lr=2.1e-4 num_training_iters=650 old_dynamics=True reward_shaping_horizon=5000000 use_phi=False vf_loss_coeff=0.022 results_dir=reproduced_results/ppo_sp_asymmetric_advantages
python ppo_rllib_client.py with  seeds=[40]  layout_name="asymmetric_advantages" clip_param=0.229 gamma=0.964 grad_clip=0.256 kl_coeff=0.185 lmbda=0.5 lr=2.1e-4 num_training_iters=650 old_dynamics=True reward_shaping_horizon=5000000 use_phi=False vf_loss_coeff=0.022 results_dir=reproduced_results/ppo_sp_asymmetric_advantages

python ppo_rllib_client.py with  seeds=[0]  layout_name="forced_coordination" clip_param=0.258 gamma=0.972 grad_clip=0.295 kl_coeff=0.31 lmbda=0.6 lr=2.77e-4 num_training_iters=650 old_dynamics=True reward_shaping_horizon=4000000 use_phi=False vf_loss_coeff=0.016 results_dir=reproduced_results/ppo_sp_forced_coordination
python ppo_rllib_client.py with  seeds=[10]  layout_name="forced_coordination" clip_param=0.258 gamma=0.972 grad_clip=0.295 kl_coeff=0.31 lmbda=0.6 lr=2.77e-4 num_training_iters=650 old_dynamics=True reward_shaping_horizon=4000000 use_phi=False vf_loss_coeff=0.016 results_dir=reproduced_results/ppo_sp_forced_coordination
python ppo_rllib_client.py with  seeds=[20]  layout_name="forced_coordination" clip_param=0.258 gamma=0.972 grad_clip=0.295 kl_coeff=0.31 lmbda=0.6 lr=2.77e-4 num_training_iters=650 old_dynamics=True reward_shaping_horizon=4000000 use_phi=False vf_loss_coeff=0.016 results_dir=reproduced_results/ppo_sp_forced_coordination
python ppo_rllib_client.py with  seeds=[30]  layout_name="forced_coordination" clip_param=0.258 gamma=0.972 grad_clip=0.295 kl_coeff=0.31 lmbda=0.6 lr=2.77e-4 num_training_iters=650 old_dynamics=True reward_shaping_horizon=4000000 use_phi=False vf_loss_coeff=0.016 results_dir=reproduced_results/ppo_sp_forced_coordination
python ppo_rllib_client.py with  seeds=[40]  layout_name="forced_coordination" clip_param=0.258 gamma=0.972 grad_clip=0.295 kl_coeff=0.31 lmbda=0.6 lr=2.77e-4 num_training_iters=650 old_dynamics=True reward_shaping_horizon=4000000 use_phi=False vf_loss_coeff=0.016 results_dir=reproduced_results/ppo_sp_forced_coordination

python ppo_rllib_client.py with  seeds=[0]  layout_name="cramped_room" clip_param=0.132 gamma=0.964 grad_clip=0.247 kl_coeff=0.197 lmbda=0.6 lr=1.63e-4 num_training_iters=550 old_dynamics=True reward_shaping_horizon=4500000 use_phi=False vf_loss_coeff=9.95e-3 results_dir=reproduced_results/ppo_sp_cramped_room
python ppo_rllib_client.py with  seeds=[10]  layout_name="cramped_room" clip_param=0.132 gamma=0.964 grad_clip=0.247 kl_coeff=0.197 lmbda=0.6 lr=1.63e-4 num_training_iters=550 old_dynamics=True reward_shaping_horizon=4500000 use_phi=False vf_loss_coeff=9.95e-3 results_dir=reproduced_results/ppo_sp_cramped_room
python ppo_rllib_client.py with  seeds=[20]  layout_name="cramped_room" clip_param=0.132 gamma=0.964 grad_clip=0.247 kl_coeff=0.197 lmbda=0.6 lr=1.63e-4 num_training_iters=550 old_dynamics=True reward_shaping_horizon=4500000 use_phi=False vf_loss_coeff=9.95e-3 results_dir=reproduced_results/ppo_sp_cramped_room
python ppo_rllib_client.py with  seeds=[30]  layout_name="cramped_room" clip_param=0.132 gamma=0.964 grad_clip=0.247 kl_coeff=0.197 lmbda=0.6 lr=1.63e-4 num_training_iters=550 old_dynamics=True reward_shaping_horizon=4500000 use_phi=False vf_loss_coeff=9.95e-3 results_dir=reproduced_results/ppo_sp_cramped_room
python ppo_rllib_client.py with  seeds=[40]  layout_name="cramped_room" clip_param=0.132 gamma=0.964 grad_clip=0.247 kl_coeff=0.197 lmbda=0.6 lr=1.63e-4 num_training_iters=550 old_dynamics=True reward_shaping_horizon=4500000 use_phi=False vf_loss_coeff=9.95e-3 results_dir=reproduced_results/ppo_sp_cramped_room

python ppo_rllib_client.py with  seeds=[0]  layout_name="counter_circuit_o_1order" clip_param=0.146 gamma=0.978 grad_clip=0.229 kl_coeff=0.299 lmbda=0.6 lr=2.29e-4 num_training_iters=650 old_dynamics=True reward_shaping_horizon=5000000 use_phi=False vf_loss_coeff=9.92e-3 results_dir=reproduced_results/ppo_sp_counter_circuit
python ppo_rllib_client.py with  seeds=[10]  layout_name="counter_circuit_o_1order" clip_param=0.146 gamma=0.978 grad_clip=0.229 kl_coeff=0.299 lmbda=0.6 lr=2.29e-4 num_training_iters=650 old_dynamics=True reward_shaping_horizon=5000000 use_phi=False vf_loss_coeff=9.92e-3 results_dir=reproduced_results/ppo_sp_counter_circuit
python ppo_rllib_client.py with  seeds=[20]  layout_name="counter_circuit_o_1order" clip_param=0.146 gamma=0.978 grad_clip=0.229 kl_coeff=0.299 lmbda=0.6 lr=2.29e-4 num_training_iters=650 old_dynamics=True reward_shaping_horizon=5000000 use_phi=False vf_loss_coeff=9.92e-3 results_dir=reproduced_results/ppo_sp_counter_circuit
python ppo_rllib_client.py with  seeds=[30]  layout_name="counter_circuit_o_1order" clip_param=0.146 gamma=0.978 grad_clip=0.229 kl_coeff=0.299 lmbda=0.6 lr=2.29e-4 num_training_iters=650 old_dynamics=True reward_shaping_horizon=5000000 use_phi=False vf_loss_coeff=9.92e-3 results_dir=reproduced_results/ppo_sp_counter_circuit
python ppo_rllib_client.py with  seeds=[40]  layout_name="counter_circuit_o_1order" clip_param=0.146 gamma=0.978 grad_clip=0.229 kl_coeff=0.299 lmbda=0.6 lr=2.29e-4 num_training_iters=650 old_dynamics=True reward_shaping_horizon=5000000 use_phi=False vf_loss_coeff=9.92e-3 results_dir=reproduced_results/ppo_sp_counter_circuit
